library(tidyverse)
## ── Attaching packages ──────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
head(daily_report)
## # A tibble: 6 x 12
##    FIPS Admin2 Province_State Country_Region Last_Update   Lat   Long Confirmed
##   <dbl> <chr>  <chr>          <chr>          <chr>       <dbl>  <dbl>     <dbl>
## 1 45001 Abbev… South Carolina US             4/2/20 23:…  34.2  -82.5         6
## 2 22001 Acadia Louisiana      US             4/2/20 23:…  30.3  -92.4        61
## 3 51001 Accom… Virginia       US             4/2/20 23:…  37.8  -75.6        10
## 4 16001 Ada    Idaho          US             4/2/20 23:…  43.5 -116.        312
## 5 19001 Adair  Iowa           US             4/2/20 23:…  41.3  -94.5         1
## 6 29001 Adair  Missouri       US             4/2/20 23:…  40.2  -92.6         6
## # … with 4 more variables: Deaths <dbl>, Recovered <dbl>, Active <dbl>,
## #   Combined_Key <chr>
ggplot(daily_report, aes(x= Long, y= Lat, size= Confirmed/1000))+
  borders("world", color= NA, fill= "grey90") +
  theme_bw()+
  geom_point(shape= 21, color= 'purple', fill= 'purple', alpha= 0.5, size= 1)+
  labs(titles= 'World COVID-19 Confirmed Cases', x= '', y= '', size= "Cases (x1000))")+
  theme(legend.position = "right") +
  coord_fixed(ratio= 1.5)
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 54 rows containing missing values (geom_point).

Exercise 1

daily_report_9_26_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
head(daily_report_9_26_2020)
## # A tibble: 6 x 14
##    FIPS Admin2 Province_State Country_Region Last_Update   Lat   Long Confirmed
##   <dbl> <chr>  <chr>          <chr>          <chr>       <dbl>  <dbl>     <dbl>
## 1    NA <NA>   <NA>           Afghanistan    9/27/20 4:…  33.9  67.7      39192
## 2    NA <NA>   <NA>           Albania        9/27/20 4:…  41.2  20.2      13153
## 3    NA <NA>   <NA>           Algeria        9/27/20 4:…  28.0   1.66     50914
## 4    NA <NA>   <NA>           Andorra        9/27/20 4:…  42.5   1.52      1836
## 5    NA <NA>   <NA>           Angola         9/27/20 4:… -11.2  17.9       4672
## 6    NA <NA>   <NA>           Antigua and B… 9/27/20 4:…  17.1 -61.8         98
## # … with 6 more variables: Deaths <dbl>, Recovered <dbl>, Active <dbl>,
## #   Combined_Key <chr>, Incidence_Rate <dbl>, `Case-Fatality_Ratio` <dbl>
ggplot(daily_report_9_26_2020, aes(x= Long, y= Lat, size= Confirmed/1000))+
  borders("world", color= NA, fill= "grey90") +
  theme_bw()+
  geom_point(shape= 21, color= 'purple', fill= 'purple', alpha= 0.5, size= 1)+
  labs(titles= 'World COVID-19 Confirmed Cases: 9/26/2020', x= '', y= '', size= "Counts (x1000))")+
  theme(legend.position = "right") +
  coord_fixed(ratio= 1.1)
## Warning: Duplicated aesthetics after name standardisation: colour
## Warning: Removed 81 rows containing missing values (geom_point).

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("state", colour = "black", fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5, size= 1) +
    labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)

mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7, size= 2.5) +
    scale_size_continuous(name="Cases", range=c(1,7), 
                        breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

Exercise 2

daily_report_9_26_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report_9_26_2020, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7, size= 2) +
    scale_size_continuous(name="Counts", trans="log", range=c(1,7), 
                        breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
    scale_color_viridis_c(option="viridis",name="Counts",
                        trans="log", breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US: 9/26/2020") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  group_by(Province_State) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# load the US map data
us <- map_data("state")
# We need to join the us map data with our daily report to make one data frame/tibble
state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
# plot state map
head(us)
##        long      lat group order  region subregion
## 1 -87.46201 30.38968     1     1 alabama      <NA>
## 2 -87.48493 30.37249     1     2 alabama      <NA>
## 3 -87.52503 30.37249     1     3 alabama      <NA>
## 4 -87.53076 30.33239     1     4 alabama      <NA>
## 5 -87.57087 30.32665     1     5 alabama      <NA>
## 6 -87.58806 30.32665     1     6 alabama      <NA>
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
  scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in the US'")

library(RColorBrewer)
report_03_27_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
counties <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)

state_join <- left_join(counties, report_03_27_2020, by = c("Key"))
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) +
  
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_bw() 
## Warning: Transformation introduced infinite values in discrete y-axis

Exercise 3

report_09_26_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
head(report_09_26_2020)
## # A tibble: 6 x 2
##   Key                      Confirmed
##   <chr>                        <dbl>
## 1 abbeville.south carolina       578
## 2 acadia.louisiana              2946
## 3 accomack.virginia             1175
## 4 ada.idaho                    12831
## 5 adair.iowa                      72
## 6 adair.kentucky                 309
us_9_26_2020 <- map_data("state")
counties_9_26_2020 <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)

state_join <- left_join(counties_9_26_2020, report_09_26_2020, by = c("Key"))
ggplot(data = us_9_26_2020, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) +
  
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "GnBu"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County: 9/26/2020") +
  theme_dark() 
## Warning: Transformation introduced infinite values in discrete y-axis

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "Massachusetts") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us <- map_data("state")
ma_us <- subset(us, region == "massachusetts")
counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")
state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2")) 
ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 

  geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
    scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in Massachusetts'")

head(ma_county)
##            long      lat group order        region  subregion
## 37755 -70.67435 41.73997  1184 37755 massachusetts barnstable
## 37756 -70.53683 41.79727  1184 37756 massachusetts barnstable
## 37757 -70.51392 41.78008  1184 37757 massachusetts barnstable
## 37758 -70.47954 41.75716  1184 37758 massachusetts barnstable
## 37759 -70.41078 41.73425  1184 37759 massachusetts barnstable
## 37760 -70.33630 41.72279  1184 37760 massachusetts barnstable
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
ggplotly(
  ggplot(data= ma_county, aes(x= long, y= lat, group= group))+
    coord_fixed(1.3)+
    geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous")) +
  ggtitle("COVID-19 Cases in MA") +
    ggtitle("COVID-19 Cases in MA")+
    labs(x=NULL, y=NULL)+
    theme(panel.border = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text = element_blank())
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Exercise 4

daily_report_9_26_2020_texas <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "Texas") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
texas_cali <- map_data("state")
texas_us <- subset(us, region == "texas")
counties_texas <- map_data("county")
texas_county <- subset(counties_texas, region == "texas")
state_join_texas <- left_join(texas_county, daily_report_9_26_2020_texas, by = c("subregion" = "Admin2")) 
ggplotly(
  ggplot(data= texas_county, aes(x= long, y= lat, group= group))+
    coord_fixed(1.3)+
    geom_polygon(data = state_join_texas, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous")) +
  ggtitle("COVID-19 Cases in Texas: 9/26/2020") +
    ggtitle("COVID-19 Cases in Texas: 9/26/2020")+
    labs(x=NULL, y=NULL)+
    theme(panel.border = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text = element_blank())
)

Exercise 5

## Parsed with column specification:
## cols(
##   `Province/State` = col_character(),
##   `Country/Region` = col_character(),
##   `Last Update` = col_datetime(format = ""),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Latitude = col_double(),
##   Longitude = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
##   `Country/Region` Confirmed
##   <chr>                <dbl>
## 1 Mainland China       79826
## 2 South Korea           3736
## 3 Italy                 1694
## 4 Iran                   978
## 5 Others                 705
## 6 Japan                  256

—————————————————————————— Here we will be looking at the dynamics of COVID-19 confirmed cases in the United States relative to other countries from March through September 2020. ——————————————————————————

barplot(log2(Confirmed) ~. , data= world_data_3, main= "Top 10 Confirmed Cases in the World: MARCH", col= "orange", las= 2)

—————————————————————————— As we can see in the graph above, in March, the United states does not lie within the top 10 Confirmed case in the world. ——————————————————————————

## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
##   Country_Region Confirmed
##   <chr>              <dbl>
## 1 US               6073840
## 2 Brazil           3950931
## 3 India            3769523
## 4 Russia            997072
## 5 Peru              652037
## 6 South Africa      628259
barplot(Confirmed ~. , data= world_data_9, main= "Top 10 Confirmed Cases in the World: SEPTEMBER", col= "cornflowerblue", las=2)

—————————————————————————— However, as we can see in the bar plot above, in September, the United States is ranked the country with highest number of confirmed cases in the world. So where did it all start? ——————————————————————————

## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
##   Province_State Confirmed
##   <chr>              <dbl>
## 1 alabama              517
## 2 alaska                56
## 3 american samoa         0
## 4 arizona              508
## 5 arkansas             335
## 6 california          3899

—————————————————————————— Lets look at the Confirmed cases in United States according to each state. ——————————————————————————

—————————————————————————— Hover through each state to find out the exact counts. Alternatively, we can see from the color gradients that there seems to be more reported cases in the east coast relative to the west. Now lets look at which specific states reported the highest count of confirmed cases. ——————————————————————————

barplot(Confirmed ~. , data= daily_report_US_3, main= "Top 10 States with highest Confirmed Cases: MARCH", col= "green", las= 2)

_—————————————————————————— Here we can visibly see the difference in ratio between two highest east coast state vs west coast states. New York and New Jersey seems to have way higher counts than California and Washington. In fact, new Jersey which is the second highest state in East coast has more counts than both Washington and California combined. Similarly, New York has more counts than all of the remaining 9 states combined.

This comes to show that the pandemic in the United States may have started from the east coast. Now lets see the dynamics of the pandemic in the US over time. _——————————————————————————

## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)

—————————————————————————— The graph above shows us that over time the confirmed cases in US increased at a very high rate. Compared to the graph from march, which has majority of yellow gradients, now we see that majority have green. Some states like Texas and California seems to have one of th highest counts. Lets look at it in more detail. ——————————————————————————

_—————————————————————————— Here we can see the difference between states more precisely. The top 2 states with the highest counts are California and Texas which are west coast. However, Florida has experienced a sudden increase in counts (college partying?).

Hence, we can finally conclude the dynamics of the 2020 COVID-19 outbreak in the United States. The outbreak mainly started from the east coast earlier in the year and over time spreaded to the west coast and downwards to florida. There are various factors behind such a pattern that we see. One of the main being the fact that these states were initially low in counts. How does that matter? States which were considered “safe” initially tend to loosen up in terms of social distancing and other safety measures, therefore, resulting in such a shift. _——————————————————————————